{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "39329df3-1f99-4b11-9405-5969d52368a7",
   "metadata": {},
   "source": [
    "# XGboost & Optuna Example"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7d5f0ab-33cd-40f2-82e7-fb2747f04f89",
   "metadata": {},
   "source": [
    "Example showing how to use the Optuna library (https://optuna.readthedocs.io/en/stable/) for Bayesian hyperparameter optimization (via tree of parzen estimator)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7f61a90e-a119-4bd0-af21-38604c5b4eec",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "scikit-learn: 1.0.2\n",
      "optuna      : 2.10.0\n",
      "xgboost     : 1.5.1\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/sebastian/miniforge3/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  from pandas import MultiIndex, Int64Index\n"
     ]
    }
   ],
   "source": [
    "%load_ext watermark\n",
    "%watermark -p scikit-learn,optuna,xgboost"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1f0489c2-dd9c-4e71-a78c-e01201762b37",
   "metadata": {},
   "source": [
    "## Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "271b17ff-5ea4-4161-8b7f-20ba8131d666",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train/Valid/Test sizes: 398 80 171\n"
     ]
    }
   ],
   "source": [
    "from sklearn import model_selection\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import datasets\n",
    "\n",
    "\n",
    "data = datasets.load_breast_cancer()\n",
    "X, y = data.data, data.target\n",
    "\n",
    "X_train, X_test, y_train, y_test = \\\n",
    "    train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)\n",
    "\n",
    "X_train_sub, X_valid, y_train_sub, y_valid = \\\n",
    "    train_test_split(X_train, y_train, test_size=0.2, random_state=1, stratify=y_train)\n",
    "\n",
    "print('Train/Valid/Test sizes:', y_train.shape[0], y_valid.shape[0], y_test.shape[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0affc454-9f07-48e6-bcee-e6253d968247",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Optuna"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9a6bb270-d2a1-4179-a770-39bad5a8332c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from xgboost import XGBClassifier\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "import numpy as np\n",
    "import optuna\n",
    "\n",
    "\n",
    "def optimization_objective(trial, X_train, y_train, cv=5):\n",
    "\n",
    "    \n",
    "    params =  {\n",
    "            \"n_estimators\": trial.suggest_categorical(\"n_estimators\", [30, 50, 100, 300]),\n",
    "            \"learning_rate\": trial.suggest_discrete_uniform(\"learning_rate\", [0.01]),\n",
    "            \"lambda\": trial.suggest_loguniform(\"lambda\", 1e-8, 1.0),\n",
    "            \"alpha\": trial.suggest_loguniform(\"alpha\", 1e-8, 1.0),\n",
    "    }\n",
    "    \n",
    "\n",
    "    cv_iterator = StratifiedKFold(n_splits=cv, shuffle=True, random_state=123)\n",
    "\n",
    "    cv_scores = np.zeros(cv)\n",
    "    for idx, (train_sub_idx, valid_idx) in enumerate(cv_iterator.split(X_train, y_train)):\n",
    "        \n",
    "        X_train_sub, X_valid = X_train[train_sub_idx], X_train[valid_idx]\n",
    "        y_train_sub, y_valid = y_train[train_sub_idx], y_train[valid_idx]\n",
    "        \n",
    "\n",
    "        model = XGBClassifier(**params, random_state=123, use_label_encoder=False)\n",
    "        \n",
    "        model.fit(\n",
    "            X_train_sub,\n",
    "            y_train_sub,\n",
    "            verbose=False,\n",
    "            eval_set=[(X_valid, y_valid)],\n",
    "            eval_metric=\"auc\",\n",
    "            early_stopping_rounds=100,\n",
    "        )\n",
    "        \n",
    "        preds = model.score(X_valid, y_valid)\n",
    "        \n",
    "        cv_scores[idx] = preds\n",
    "\n",
    "    return np.mean(cv_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a51829c6-234f-401f-84ed-a005f71d0150",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m[I 2022-02-13 12:21:10,936]\u001b[0m A new study created in memory with name: XGBoost Classifier\u001b[0m\n",
      "\u001b[33m[W 2022-02-13 12:21:10,937]\u001b[0m Trial 0 failed because of the following error: TypeError(\"suggest_discrete_uniform() missing 2 required positional arguments: 'high' and 'q'\")\u001b[0m\n",
      "Traceback (most recent call last):\n",
      "  File \"/Users/sebastian/miniforge3/lib/python3.9/site-packages/optuna/study/_optimize.py\", line 213, in _run_trial\n",
      "    value_or_values = func(trial)\n",
      "  File \"/var/folders/jg/tpqyh1fd5js5wsr1d138k3n40000gn/T/ipykernel_60279/3460748202.py\", line 4, in func\n",
      "    return optimization_objective(trial, X_train, y_train)\n",
      "  File \"/var/folders/jg/tpqyh1fd5js5wsr1d138k3n40000gn/T/ipykernel_60279/2851631413.py\", line 12, in optimization_objective\n",
      "    \"learning_rate\": trial.suggest_discrete_uniform(\"learning_rate\", [0.01]),\n",
      "TypeError: suggest_discrete_uniform() missing 2 required positional arguments: 'high' and 'q'\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "suggest_discrete_uniform() missing 2 required positional arguments: 'high' and 'q'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Input \u001b[0;32mIn [4]\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc\u001b[39m(trial):\n\u001b[1;32m      4\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m optimization_objective(trial, X_train, y_train)\n\u001b[0;32m----> 6\u001b[0m \u001b[43mstudy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptimize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_trials\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m50\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniforge3/lib/python3.9/site-packages/optuna/study/study.py:400\u001b[0m, in \u001b[0;36mStudy.optimize\u001b[0;34m(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)\u001b[0m\n\u001b[1;32m    392\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m n_jobs \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m    393\u001b[0m     warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m    394\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`n_jobs` argument has been deprecated in v2.7.0. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    395\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis feature will be removed in v4.0.0. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    396\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSee https://github.com/optuna/optuna/releases/tag/v2.7.0.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    397\u001b[0m         \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[1;32m    398\u001b[0m     )\n\u001b[0;32m--> 400\u001b[0m \u001b[43m_optimize\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    401\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstudy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    402\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    403\u001b[0m \u001b[43m    \u001b[49m\u001b[43mn_trials\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_trials\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    404\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    405\u001b[0m \u001b[43m    \u001b[49m\u001b[43mn_jobs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_jobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    406\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcatch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcatch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    407\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    408\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgc_after_trial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgc_after_trial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    409\u001b[0m \u001b[43m    \u001b[49m\u001b[43mshow_progress_bar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshow_progress_bar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    410\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniforge3/lib/python3.9/site-packages/optuna/study/_optimize.py:66\u001b[0m, in \u001b[0;36m_optimize\u001b[0;34m(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)\u001b[0m\n\u001b[1;32m     64\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m     65\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m n_jobs \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m---> 66\u001b[0m         \u001b[43m_optimize_sequential\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     67\u001b[0m \u001b[43m            \u001b[49m\u001b[43mstudy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     68\u001b[0m \u001b[43m            \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     69\u001b[0m \u001b[43m            \u001b[49m\u001b[43mn_trials\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     70\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     71\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcatch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     72\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     73\u001b[0m \u001b[43m            \u001b[49m\u001b[43mgc_after_trial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     74\u001b[0m \u001b[43m            \u001b[49m\u001b[43mreseed_sampler_rng\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m     75\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtime_start\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m     76\u001b[0m \u001b[43m            \u001b[49m\u001b[43mprogress_bar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress_bar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     77\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     78\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     79\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m show_progress_bar:\n",
      "File \u001b[0;32m~/miniforge3/lib/python3.9/site-packages/optuna/study/_optimize.py:163\u001b[0m, in \u001b[0;36m_optimize_sequential\u001b[0;34m(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)\u001b[0m\n\u001b[1;32m    160\u001b[0m         \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m    162\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 163\u001b[0m     trial \u001b[38;5;241m=\u001b[39m \u001b[43m_run_trial\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstudy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcatch\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    164\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m    165\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m\n",
      "File \u001b[0;32m~/miniforge3/lib/python3.9/site-packages/optuna/study/_optimize.py:264\u001b[0m, in \u001b[0;36m_run_trial\u001b[0;34m(study, func, catch)\u001b[0m\n\u001b[1;32m    261\u001b[0m         \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mShould not reach.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    263\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m state \u001b[38;5;241m==\u001b[39m TrialState\u001b[38;5;241m.\u001b[39mFAIL \u001b[38;5;129;01mand\u001b[39;00m func_err \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(func_err, catch):\n\u001b[0;32m--> 264\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m func_err\n\u001b[1;32m    265\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m trial\n",
      "File \u001b[0;32m~/miniforge3/lib/python3.9/site-packages/optuna/study/_optimize.py:213\u001b[0m, in \u001b[0;36m_run_trial\u001b[0;34m(study, func, catch)\u001b[0m\n\u001b[1;32m    210\u001b[0m     thread\u001b[38;5;241m.\u001b[39mstart()\n\u001b[1;32m    212\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 213\u001b[0m     value_or_values \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    214\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mTrialPruned \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    215\u001b[0m     \u001b[38;5;66;03m# TODO(mamu): Handle multi-objective cases.\u001b[39;00m\n\u001b[1;32m    216\u001b[0m     state \u001b[38;5;241m=\u001b[39m TrialState\u001b[38;5;241m.\u001b[39mPRUNED\n",
      "Input \u001b[0;32mIn [4]\u001b[0m, in \u001b[0;36mfunc\u001b[0;34m(trial)\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc\u001b[39m(trial):\n\u001b[0;32m----> 4\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43moptimization_objective\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m)\u001b[49m\n",
      "Input \u001b[0;32mIn [3]\u001b[0m, in \u001b[0;36moptimization_objective\u001b[0;34m(trial, X_train, y_train, cv)\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21moptimization_objective\u001b[39m(trial, X_train, y_train, cv\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m):\n\u001b[1;32m     10\u001b[0m     params \u001b[38;5;241m=\u001b[39m  {\n\u001b[1;32m     11\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_estimators\u001b[39m\u001b[38;5;124m\"\u001b[39m: trial\u001b[38;5;241m.\u001b[39msuggest_categorical(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_estimators\u001b[39m\u001b[38;5;124m\"\u001b[39m, [\u001b[38;5;241m30\u001b[39m, \u001b[38;5;241m50\u001b[39m, \u001b[38;5;241m100\u001b[39m, \u001b[38;5;241m300\u001b[39m]),\n\u001b[0;32m---> 12\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlearning_rate\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[43mtrial\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msuggest_discrete_uniform\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlearning_rate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m     13\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlambda\u001b[39m\u001b[38;5;124m\"\u001b[39m: trial\u001b[38;5;241m.\u001b[39msuggest_loguniform(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlambda\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m1e-8\u001b[39m, \u001b[38;5;241m1.0\u001b[39m),\n\u001b[1;32m     14\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124malpha\u001b[39m\u001b[38;5;124m\"\u001b[39m: trial\u001b[38;5;241m.\u001b[39msuggest_loguniform(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124malpha\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m1e-8\u001b[39m, \u001b[38;5;241m1.0\u001b[39m),\n\u001b[1;32m     15\u001b[0m     }\n\u001b[1;32m     18\u001b[0m     cv_iterator \u001b[38;5;241m=\u001b[39m StratifiedKFold(n_splits\u001b[38;5;241m=\u001b[39mcv, shuffle\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m123\u001b[39m)\n\u001b[1;32m     20\u001b[0m     cv_scores \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mzeros(cv)\n",
      "\u001b[0;31mTypeError\u001b[0m: suggest_discrete_uniform() missing 2 required positional arguments: 'high' and 'q'"
     ]
    }
   ],
   "source": [
    "study = optuna.create_study(direction=\"maximize\", study_name=\"XGBoost Classifier\")\n",
    "\n",
    "def func(trial):\n",
    "    return optimization_objective(trial, X_train, y_train)\n",
    "\n",
    "study.optimize(func, n_trials=50);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c26399d-ebfc-4b06-86d9-36e49711e908",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Best CV accuracy: {study.best_value:.5f}\")\n",
    "print(\"Best params:\")\n",
    "\n",
    "for key, value in study.best_params.items():\n",
    "    print(f\"\\t{key}: {value}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "83e99f85-9ce2-494e-99ea-20ab49dc0b15",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBClassifier(**study.best_params, random_state=123, use_label_encoder=False)\n",
    "model.fit(\n",
    "    X_train,\n",
    "    y_train,\n",
    "    verbose=False,\n",
    "    eval_set=[(X_valid, y_valid)],\n",
    "    eval_metric=\"auc\",\n",
    "    early_stopping_rounds=100,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fbb610d8-4846-4e9f-a589-adacd0042603",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Training Accuracy: {model.score(X_train, y_train):0.2f}\")\n",
    "print(f\"Test Accuracy: {model.score(X_test, y_test):0.2f}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
